The three maps below show maps of Fairfield County, South Carolina. To generate these maps, first scroll down to the section titled "Run These Cells First". Run all cells below that heading. Then, you may return to the top of this file and generate the three maps.
m = folium.Map(location=[34.4, -81.1], zoom_start=10.5)
folium.Choropleth(
geo_data=geojson,
name='Household Size',
data=cbg_data,
columns=['census_block_group', 'household_size'], # , # ['State', 'Unemployment'],
key_on='feature.properties.CensusBlockGroup',
fill_color='Reds',
fill_opacity=0.7,
line_opacity=0.2,
legend_name='Average Household Size (# of people)'
).add_to(m)
marker_cluster = MarkerCluster(options={'maxClusterRadius': 10})
for building in buildings:
add_marker(building, marker_cluster)
m.add_child(marker_cluster)
m
m = folium.Map(location=[34.4, -81.1], zoom_start=10.5)
folium.Choropleth(
geo_data=geojson,
name='Median Household Income',
data=cbg_data,
columns=['census_block_group', 'B19013e1'], # B01001e1, # ['State', 'Unemployment'],
key_on='feature.properties.CensusBlockGroup',
fill_color='Greens',
fill_opacity=0.7,
line_opacity=0.2,
legend_name='Median Household Income ($)'
).add_to(m)
marker_cluster = MarkerCluster(options={'maxClusterRadius': 10})
for building in buildings:
add_marker(building, marker_cluster)
m.add_child(marker_cluster)
m
m = folium.Map(location=[34.4, -81.1], zoom_start=10.5) # , tiles='Stamen Toner')
folium.Choropleth(
geo_data=geojson,
name='Population',
data=cbg_data,
columns=['census_block_group', 'B01001e1'], # , # ['State', 'Unemployment'],
key_on='feature.properties.CensusBlockGroup',
fill_color='Blues',
fill_opacity=0.7,
line_opacity=0.2,
legend_name='Population (# of people)'
).add_to(m)
marker_cluster = MarkerCluster(options={'maxClusterRadius': 10})
for building in buildings:
add_marker(building, marker_cluster)
m.add_child(marker_cluster)
# marker_cluster.add_to(m)
m
m = folium.Map(location=[34.4, -81.1], zoom_start=10.5) # , tiles='Stamen Toner')
folium.Choropleth(
geo_data=geojson,
name='Population',
data=cbg_data,
columns=['census_block_group', 'percent_uninsured'],
key_on='feature.properties.CensusBlockGroup',
fill_color='Oranges',
fill_opacity=0.7,
line_opacity=0.2,
legend_name='Percentage With No Health Insurance (%)'
).add_to(m)
marker_cluster = MarkerCluster(options={'maxClusterRadius': 10})
for building in buildings:
add_marker(building, marker_cluster)
m.add_child(marker_cluster)
# marker_cluster.add_to(m)
m
def add_marker(building, marker_cluster):
types = {
'Apartments': {
'icon': 'home',
'color': 'orange'
},
'Nursing Home': {
'icon': 'heart',
'color': 'red'
}
}
types_obj = types[building['type']]
marker = folium.Marker(
location=building['coordinates'],
tooltip=f"<i>[{building['type']}]</i><br/>{building['name']}<br/>\n{building['address']}",
icon=folium.Icon(color=types_obj['color'], icon=types_obj['icon'])
) # .add_to(map_obj)
marker_cluster.add_child(marker)
(after this, you can run the cells above to generate the maps)
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from geopy.geocoders import Nominatim
import branca
import branca.colormap as cm
import math
locator = Nominatim(user_agent='myGeocoder')
location = locator.geocode('S-20-151, Fairfield, SC')
print(location, 'Latitude = {}, Longitude = {}'.format(location.latitude, location.longitude))
apartments = [
{
'name': 'Lamplighter Apartments',
'address': 'Winnsboro Mills, SC 29180',
'coordinates': (34.350296, -81.086537),
},
{
'name': 'Deer Wood Apartments',
'address': '647 US-321 BYP, Winnsboro, SC 29180',
'coordinates': (34.361933, -81.098159),
},
{
'name': 'Winnsboro Arms Apartments',
'address': '61 Winnsboro Arms Dr, Winnsboro, SC 29180',
'coordinates': (34.372125, -81.105137),
},
{
'name': 'Gibson APT',
'coordinates': (34.374366, -81.088088),
'address': '308 Palmer St, Winnsboro, SC 29180',
},
{
'name': 'Castlewood Apartments Phase I',
'address': '200 Castlewood Dr, Winnsboro, SC 29180',
'coordinates': (34.369261, -81.095067),
},
{
'name': 'Laurelwood Aparrtments',
'coordinates': (34.372969, -81.094324),
'address': '16A Laurel Wood Ct, Winnsboro, SC 29180',
},
]
nursing_homes = [
{
'name': 'PruittHealth - Ridgeway',
'address': '213 Tanglewood Court, Ridgeway, SC 29130',
'coordinates': (34.302030, -80.964782),
},
{
'name': 'Blue Ridge in the Fields',
'address': '117 Bellefield Rd, Ridgeway, SC 29130',
'coordinates': (34.330878, -80.907010),
},
{
'name': 'Ridgeway Manor Healthcare Center',
'address': '117 Bellefield Rd, Ridgeway, SC 29130',
'coordinates': (34.329630, -80.906810),
},
]
buildings = []
for building in apartments:
building['type'] = 'Apartments'
buildings.append(building)
for building in nursing_homes:
building['type'] = 'Nursing Home'
buildings.append(building)
import os
from pathlib import Path
import json
import linecache
import functools
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from shapely.ops import nearest_points
from shapely.geometry import LineString
DATA_PATH = Path('/data/safegraph/safegraph_open_census_data')
PREPROCESSED_DATA_PATH = Path('../../../data/preprocessed/safegraph/safegraph_open_census_data')
# Only needs to be run once, to generate data slices
# This data is now included with the repo anyway -- so this does not need to be run any more
county_fips_code = '45039' # Fairfield County, South Carolina
#!mkdir -p {PREPROCESSED_DATA_PATH}
#census_data_file_names = !ls {DATA_PATH}/data/ | grep [0-9] # | cut -f 1 -d . # eliminate .csv suffix
county_directory = PREPROCESSED_DATA_PATH / "data/county" / county_fips_code
#!mkdir -p {county_directory}
#for file_name in census_data_file_names:
# !touch {county_directory}/{file_name}
# print(county_directory/file_name)
# !head -n 1 "{DATA_PATH}/data/{file_name}" > {county_directory}/{file_name}
# !cat "{DATA_PATH}/data/{file_name}" | grep ^{county_fips_code}.*$ >> {county_directory}/{file_name}
# See: https://www.safegraph.com/blog/beginners-guide-to-census
table_ids = [
'B01001e1', # SEX BY AGE: Total: Total population -- (Estimate),Sex By Age, Total, Total Population -- (Estimate),,,,,
#'B00001e1', # UNWEIGHTED SAMPLE COUNT OF THE POPULATION: Total: Total population -- (Estimate),Unweighted Sample Count Of The Population, Total, Total Population -- (Estimate),,,,,
#'B00001m1', # UNWEIGHTED SAMPLE COUNT OF THE POPULATION: Total: Total population -- (Margin of Error),Unweighted Sample Count Of The Population, Total, Total Population -- (Margin Of Error),,,,,
'B19013e1', # Median Household Income
#'B00002e1', # UNWEIGHTED SAMPLE HOUSING UNITS: Total: Housing units -- (Estimate),Unweighted Sample Housing Units, Total, Housing Units -- (Estimate),,,,,
#'B00002m1', # UNWEIGHTED SAMPLE HOUSING UNITS: Total: Housing units -- (Margin of Error),Unweighted Sample Housing Units, Total, Housing Units -- (Margin Of Error),,,,,
'B25001e1', # HOUSING UNITS: Total: Housing units -- (Estimate),Housing Units, Total, Housing Units -- (Estimate),,,,,
#'B25001m1', # HOUSING UNITS: Total: Housing units -- (Margin of Error),Housing Units, Total, Housing Units -- (Margin Of Error),,,,,
# 'B27010e35', # TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 35 to 64 years: With one type of health insurance coverage: Civilian noninstitutionalized population -- (Estimate),Types Of Health Insurance Coverage By Age, 35 To 64 Years, With One Type Of Health Insurance Coverage, Civilian Noninstitutionalized Population -- (Estimate),,,,
'B27010e17', # TYPES OF HEALTH INSURANCE COVERAGE BY AGE: Under 18 years: No health insurance coverage: Civilian noninstitutionalized population -- (Estimate),Types Of Health Insurance Coverage By Age, Under 18 Years, No Health Insurance Coverage, Civilian Noninstitutionalized Population -- (Estimate),,,,
'B27010e33', # TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 18 to 34 years: No health insurance coverage: Civilian noninstitutionalized population -- (Estimate),Types Of Health Insurance Coverage By Age, 18 To 34 Years, No Health Insurance Coverage, Civilian Noninstitutionalized Population -- (Estimate),,,,
'B27010e50', # TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 35 to 64 years: No health insurance coverage: Civilian noninstitutionalized population -- (Estimate),Types Of Health Insurance Coverage By Age, 35 To 64 Years, No Health Insurance Coverage, Civilian Noninstitutionalized Population -- (Estimate),,,,
'B27010e66', # TYPES OF HEALTH INSURANCE COVERAGE BY AGE: 65 years and over: No health insurance coverage: Civilian noninstitutionalized population -- (Estimate),Types Of Health Insurance Coverage By Age, 65 Years And Over, No Health Insurance Coverage, Civilian Noninstitutionalized Population -- (Estimate),,,,
'C17002e1', #RATIO OF INCOME TO POVERTY LEVEL IN THE PAST 12 MONTHS: Total: Population for whom poverty status is determined -- (Estimate),Ratio Of Income To Poverty Level In The Past 12 Months, Total, Population For Whom Poverty Status Is Determined -- (Estimate),,,,,
# unavailable:
# 'B17017e10', # POVERTY STATUS IN THE PAST 12 MONTHS BY HOUSEHOLD TYPE BY AGE OF HOUSEHOLDER: Total: Households -- (Estimate),Poverty Status In The Past 12 Months By Household Type By Age Of Householder, Total, Households -- (Estimate),,,,,
]
cbg_field_desc = pd.read_csv(PREPROCESSED_DATA_PATH / 'metadata/cbg_field_descriptions.csv')
cbg_field_desc[cbg_field_desc.table_id.isin(table_ids)]
county_files = ['cbg_b00.csv', 'cbg_b01.csv', 'cbg_b19.csv', 'cbg_b25.csv', 'cbg_b27.csv', 'cbg_c17.csv'] # !ls {county_directory}
dfs = [pd.read_csv(county_directory / file, dtype={'census_block_group': str}) for file in county_files]
merged = dfs[0]
for df in dfs[1:]:
merged = pd.merge(merged, df, on=['census_block_group'])
cbg_data = merged
#combiner = lambda left, right: pd.merge(left, right, on=['census_block_group'])
#cbg_data = functools.reduce(combiner, dfs[0], dfs[1:])
'''cbg_b19 = pd.read_csv(county_directory / 'cbg_b19.csv', dtype={'census_block_group': str})
cbg_b01 = pd.read_csv(county_directory / 'cbg_b01.csv', dtype={'census_block_group': str})
cbg_data = pd.merge(cbg_b01, cbg_b19, on=['census_block_group'])'''
cbg_data = cbg_data[['census_block_group'] + table_ids]
#cbg_data.dropna().head()
cbg_data
sum(cbg_data['B01001e1']) # total population
cbg_data['household_size'] = cbg_data['B01001e1'] / cbg_data['B25001e1']
cbg_data['num_uninsured'] = sum([cbg_data[col_id] for col_id in ['B27010e17', 'B27010e33', 'B27010e50', 'B27010e66']])
cbg_data['percent_uninsured'] = cbg_data['num_uninsured'] * 100.0 / cbg_data['B01001e1']
cbg_data
# Census Block Groups
def geojson_for_county(state_abbreviation="SC",
county_name="Fairfield County",
county_fips_code='45039',
data_path=DATA_PATH):
'''
Will only run if you have the full geometry/cbg.geojson file from the SafeGraph census dataset.
Otherwise, don't run this function: call
'''
path_suffix = 'geometry/cbg.geojson'
if county_fips_code == '45039':
file_name = PREPROCESSED_DATA_PATH / 'geometry/fips' / county_fips_code / 'cbg.geojson'
with open(file_name, 'r') as f:
return json.loads('\n'.join(f.readlines()))
else:
geojson_path = data_path / path_suffix
header = !head -n 5 {geojson_path}
footer = !tail -n 2 {geojson_path}
# lines to search file for county of interest.
# must be found by inspection using "tail | head" method below, and checking whether
# the state of interest is included.
# If not included, search up or down via binary search (file is sorted by state)
# TODO: write the binary search explicitly here, if we need to generalize to other states/counties
line_start_search = 170000
line_end_search = 180000
num_lines = line_end_search - line_start_search
# stream = os.popen(f"""< {geojson_path} tail -n +{line_start_search} | head -n {num_lines} | grep '"State": "{state_abbreviation}", "County": "{county_name}"' """)
#stream = os.popen(f"""cat {geojson_path} | tail -n +{line_start_search} | head -n {num_lines} | grep '"State": "{state_abbreviation}", "County": "{county_name}"' """)
#county_cbgs = stream.readlines()
county_cbgs = [linecache.getline(str(geojson_path), line_number).strip() for line_number in range(line_start_search, line_end_search)]
county_cbgs = [line for line in county_cbgs if f'"State": "{state_abbreviation}", "County": "{county_name}"' in line]
print(len(county_cbgs))
# remove final character from last entry in list:
# a trailing "," that will mess up the JSON parsing
if county_cbgs[-1][-1] == ',':
county_cbgs[-1] = county_cbgs[-1][:-1]
geojson = '\n'.join(header + county_cbgs + footer)
with open(PREPROCESSED_DATA_PATH / 'geometry/fips/' / county_fips_code / 'cbg.geojson', 'w') as f:
f.write(geojson)
return json.loads(geojson)
# only needs to be run once
geojson = geojson_for_county()
len([f['properties']['CensusBlockGroup'] for f in geojson['features']])
#[f['properties']['CensusBlockGroup'] for f in cbgs_json['features']]
#geojson['features'][0]['properties']
PROPERTY_DATA_PATH = Path('../../../data/preprocessed/property_assessor/fips/') / county_fips_code
property_values_df = pd.read_csv(PROPERTY_DATA_PATH / 'property_data_detailed.csv')
parcel_ids = list(property_values_df['ParcelId'])
property_values_df = property_values_df.set_index('ParcelId')
Size of each dot represents the area of each land parcel.
Color of the dot represents the parcel's value, as determined by public tax assessment records.
Click on a dot to get more information.
m = folium.Map(location=[34.4, -81.1], zoom_start=10.5) # , tiles='Stamen Toner')
marker_cluster = MarkerCluster(options={'maxClusterRadius': 10})
home_values = property_values_df[property_values_df.tax_market_value.notnull()]['tax_market_value']
high_home_value = 200000 # home_values.quantile(.8)
low_home_value = 50000 # home_values.quantile(.2)
folium.Choropleth(
geo_data=geojson,
name='Median Household Income',
data=cbg_data,
columns=['census_block_group', 'B19013e1'], # B01001e1, # ['State', 'Unemployment'],
key_on='feature.properties.CensusBlockGroup',
fill_color='Blues',
fill_opacity=0.7,
line_opacity=0.2,
legend_name='Median Household Income ($)'
).add_to(m)
colormap = cm.LinearColormap(
colors=[(255, 0, 0),
(255, 63, 63),
(255, 127, 127),
(255, 191, 191),
(255, 255, 255)],
index=[
#home_values.quantile(0.0),
home_values.quantile(0.1),
home_values.quantile(0.3),
home_values.quantile(0.50),
home_values.quantile(0.75),
home_values.quantile(0.85),
# home_values.quantile(1.00),
],
vmin=home_values.quantile(0.1),
vmax=home_values.quantile(0.85)
)
colormap.caption = 'Home Value ($)'
for parcel_id in parcel_ids: # int(len(parcel_ids)/2)]: # [2:3]
record = property_values_df.loc[parcel_id]
lat, lon = record['latitude'], record['longitude']
# record_data = eval(record['data'])
if (lat is not None) and (lon is not None) and (not np.isnan(lat)) and (not np.isnan(lon)) and not np.isnan(record['tax_market_value']):
'''folium.Circle(
radius=100,
location=[45.5244, -122.6699],
popup='The Waterfront',
color='crimson',
fill=False,
).add_to(m)'''
#print(lat)
#print(lon)
radius = 50
area_sq_ft = record['area_sq_ft']
if (area_sq_ft is not None) and (not np.isnan(area_sq_ft)):
radius = math.sqrt(area_sq_ft / math.pi / 25)
record_url = 'https://beacon.schneidercorp.com/Application.aspx?AppID=796&LayerID=11834&PageTypeID=4&PageID=5738&KeyValue='
map_url = 'https://beacon.schneidercorp.com/Application.aspx?AppID=796&LayerID=11834&PageTypeID=1&PageID=5735&KeyValue='
#owner = '[unknown]'
#if 'owner' in record_data:
# owner = record_data['owner']
#if 'name' in record_data['owner']:
# owner = record_data['owner']['name']
tooltip_string = f'''
${record['tax_market_value']:,} <br />
[<a href='{record_url}{parcel_id}'>Tax Record</a>] <br />
[<a href='{map_url}{parcel_id}'>Map</a>] <br />
'''
marker = folium.Circle(
radius=radius,
location=(lat, lon),
tooltip=tooltip_string + '(Click map marker to access links)',
popup=tooltip_string,
color=colormap(record['tax_market_value']),
fill=True,
fill_opacity=1.0
# icon=folium.Icon(color=types_obj['color'], icon=types_obj['icon'])
) # .add_to(map_obj)
marker.add_to(m)
# marker_cluster.add_child(marker)
m.add_child(colormap)
m.add_child(marker_cluster)
folium.LayerControl().add_to(m)
m